library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(zoo)
## Warning: package 'zoo' was built under R version 4.2.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)
library(DSIWastewater)
#load WasteWater_data into the environment
data(WasteWater_data, package = "DSIWastewater")

#get DF into format for buildRegressionEstimateTable
baseWaste_DF <-  buildWasteAnalysisDF(WasteWater_data)
baseWaste_DF$site <- ifelse(baseWaste_DF$site == "Madison MSD WWTF",
                            "Madison", baseWaste_DF$site)
baseWaste_DF <- baseWaste_DF[!(baseWaste_DF$site %in% c("Portage WWTF","Cedarburg WWTF")),]


baseWaste_DF <- baseWaste_DF[baseWaste_DF$n > 10,]
K=3
baseWaste_DF <- baseWaste_DF%>%
  group_by(site)%>%
    arrange(site, date)%>%
    #create K day mean of the same column to use later
    mutate(pastKavg.wwlog10 = rollmean(sars_cov2_adj_load_log10,
                                       K, align = "right",
                                       fill=NA))%>%
  ungroup()

#add quantile data to merge with the regression results
Quantiles_DF <- makeQuantileColumns(baseWaste_DF,
                                    5:9/10, c(14, 30, 60, 90),
                                    "sars_cov2_adj_load_log10")

Quantiles_DF <- Quantiles_DF[,c("site", "date", "window", "quant", "ntile", "pastKavg.wwlog10")]
Quantiles_DF <- tidyr::pivot_wider(Quantiles_DF, 
                                   names_from = c(window, quant), 
                                  values_from = c(ntile))
#Get 5 day rolling regression of data 
CDCMethod <- buildRegressionEstimateTable(baseWaste_DF,
                                          PSigTest=FALSE)

CDCMethod <- CDCMethod[,c("date","site", "modeled_percentchange", "lmreg_sig")]
#merge the regression DF and the quantile DF to get info for 

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
FULL_reg_DF <- left_join(CDCMethod, Quantiles_DF,
                                by = c("site", "date"))%>%
  tidyr::pivot_longer(cols = '14_0.5':'90_0.9',
                      names_to = c("window", "quant"),
                      values_to = "ntile",
                      names_sep = "_")#

#create flags described in @return
FULL_reg_DF <- classifyQuantileFlagRegression(FULL_reg_DF)

#return only flags and type columns 
Full_wasteFlags <- FULL_reg_DF[,c("site", "date",
                                  "window", "quant",
                                  "cdc_flag",
                                  "flag_ntile",
                                  "flag_ntile_Pval")]
data(Case_data, package = "DSIWastewater")

Case_DF <- Case_data

#get the case flags
Case_DF <- buildCaseAnalysisDF(Case_DF)


CaseRegressionOutput <- buildRegressionEstimateTable(DataMod = Case_DF, 
    RunOn = c("FirstConfirmed.Per100K", "pastwk.avg.casesperday.Per100K"),
    SplitOn = "site", DaysRegressed = 7)

case_flags_names <- c("case_flag", 
                      "case_flag_plus_comm.threshold",
                      "slope_switch_flag")

CaseRegressionOutput$Method <- ifelse(CaseRegressionOutput$Method ==   
                                      "FirstConfirmed.Per100K",
                                      "Cases", "7DayCases")


library(dplyr)
#Classify slope to create 3 flags described in @return  
CaseFlags <- classifyCaseRegression(CaseRegressionOutput)%>%
  select(Method, site, date,all_of(case_flags_names))%>%
  tidyr:::pivot_wider(names_from = "Method", values_from = all_of(case_flags_names))
Full_wasteFlags <- Full_wasteFlags%>%
  rename(cdc.flag = cdc_flag, 
         flag.ntile = flag_ntile, 
         flag.ntile.Pval = flag_ntile_Pval)%>%
  tidyr::pivot_wider(names_from = c(window, quant), 
                     values_from = c(cdc.flag, flag.ntile, flag.ntile.Pval))

Flag_DF <- full_join(CaseFlags, Full_wasteFlags, 
                     by = c("site", "date"))

date_Flag_DF <- DF_date_vector(Flag_DF, "date", 
               names(Flag_DF)[3:68])
#"case_flag_Cases"                         "case_flag_7DayCases"                    
#"case_flag_plus_comm.threshold_Cases"     "case_flag_plus_comm.threshold_7DayCases"
#"slope_switch_flag_Cases"                 "slope_switch_flag_7DayCases"
dep_flags <- names(Flag_DF)[9:68]
edgeThresh <- 7
CaseFlag <- "slope_switch_flag_Cases"
DateDistDF <- date_distance_calc(date_Flag_DF, CaseFlag, 
                                 dep_flags, edge = edgeThresh)%>%
  select(site, date, dep_flags)%>%
  tidyr::pivot_longer(cols = dep_flags,
                      names_to = c("FlagType","window", "quant"),
                      values_to = "FlagError",
                      names_sep = "_")%>%
  mutate(window = as.numeric(window), quant = as.numeric(quant))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(dep_flags)` instead of `dep_flags` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

<Above has been done before by peter. the next section is where we hope to show value>

CaseNumberFlags <- sum(Flag_DF[[CaseFlag]], na.rm = TRUE)

Flag_DF%>%
  group_by(site)%>%
  summarise(across(c(dep_flags, !!sym(CaseFlag)), ~sum(.x, na.rm=TRUE)))%>%
  mutate(across(c(dep_flags), ~(.x-!!sym(CaseFlag))))%>%
  ungroup()%>%
  summarise(across(c(dep_flags), ~sum(abs(.x), na.rm=TRUE)))%>%
  tidyr::pivot_longer(cols = dep_flags,
                      names_to = c("FlagType","window", "quant"),
                      values_to = "TotalFlagCountDiff",
                      names_sep = "_")%>%
  arrange(TotalFlagCountDiff)
## # A tibble: 60 × 4
##    FlagType        window quant TotalFlagCountDiff
##    <chr>           <chr>  <chr>              <dbl>
##  1 flag.ntile.Pval 30     0.5                  968
##  2 flag.ntile.Pval 30     0.6                  983
##  3 flag.ntile      30     0.6                  984
##  4 flag.ntile      30     0.5                 1009
##  5 flag.ntile      60     0.5                 1009
##  6 flag.ntile      60     0.6                 1016
##  7 flag.ntile      90     0.5                 1017
##  8 flag.ntile      30     0.7                 1028
##  9 flag.ntile      90     0.6                 1033
## 10 flag.ntile      60     0.7                 1047
## # … with 50 more rows
DistSummary <- DateDistDF%>%
  group_by(window, quant, FlagType)%>%
  summarise(Mean = mean(FlagError, na.rm = TRUE),
            MeanErrorSquard = mean(FlagError^2, na.rm = TRUE),
            Var = var(FlagError, na.rm = TRUE),
            n = sum(!is.na(FlagError)),
            Missed = mean(FlagError == edgeThresh, na.rm = TRUE))
## `summarise()` has grouped output by 'window', 'quant'. You can override using
## the `.groups` argument.
Plot_DF_List <- split(DistSummary, DistSummary$FlagType)

plot_ly(showlegend = TRUE, opacity = .8)%>% 
  add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
            z = Plot_DF_List[[1]]$Mean, name = "cdc.flag",  type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
            z = Plot_DF_List[[2]]$Mean, name = "flag.ntile", type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
            z = Plot_DF_List[[3]]$Mean, name = "flag.ntile.Pval", type="mesh3d")%>%
  layout(scene= list(xaxis = list(title = 'window'),
         yaxis = list(title = 'quant'),
         zaxis = list(title = 'Mean')))
plot_ly(showlegend = TRUE, opacity = .8)%>% 
  add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
            z = Plot_DF_List[[1]]$MeanErrorSquard,
            name = "cdc.flag",  type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
            z = Plot_DF_List[[2]]$MeanErrorSquard,
            name = "flag.ntile", type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
            z = Plot_DF_List[[3]]$MeanErrorSquard,
            name = "flag.ntile.Pval", type="mesh3d")%>%
  layout(scene= list(xaxis = list(title = 'window'),
         yaxis = list(title = 'quant'),
         zaxis = list(title = 'MeanErrorSquard')))
plot_ly(showlegend = TRUE, opacity = .8)%>% 
  add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
            z = Plot_DF_List[[1]]$Var, name = "cdc.flag",  type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
            z = Plot_DF_List[[2]]$Var, name = "flag.ntile", type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
            z = Plot_DF_List[[3]]$Var, name = "flag.ntile.Pval", type="mesh3d")%>%
  layout(scene= list(xaxis = list(title = 'window'),
         yaxis = list(title = 'quant'),
         zaxis = list(title = 'Var')))
plot_ly(showlegend = TRUE, opacity = .8)%>% 
  add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
            z = Plot_DF_List[[1]]$Missed, name = "cdc.flag",  type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
            z = Plot_DF_List[[2]]$Missed, name = "flag.ntile",
            type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
            z = Plot_DF_List[[3]]$Missed,
            name = "flag.ntile.Pval", type="mesh3d")%>%
  layout(scene= list(xaxis = list(title = 'window'),
         yaxis = list(title = 'quant'),
         zaxis = list(title = 'Missed')))
plot_ly(showlegend = TRUE, opacity = .8)%>% 
  add_trace(x = Plot_DF_List[[1]]$window, y = Plot_DF_List[[1]]$quant,
            z = Plot_DF_List[[1]]$n, name = "cdc.flag",  type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[2]]$window, y = Plot_DF_List[[2]]$quant,
            z = Plot_DF_List[[2]]$n, name = "flag.ntile", type="mesh3d")%>% 
  add_trace(x = Plot_DF_List[[3]]$window, y = Plot_DF_List[[3]]$quant,
            z = Plot_DF_List[[3]]$n, name = "flag.ntile.Pval", type="mesh3d")%>%
  layout(scene= list(xaxis = list(title = 'window'),
         yaxis = list(title = 'quant'),
         zaxis = list(title = 'n')))
DateDistDF%>%
  ggplot()+
  stat_count(aes(x = FlagError, fill = FlagType,
                     y = ..prop..),
                 position = "dodge")+
  facet_grid(window~quant)
## Warning: Removed 2035987 rows containing non-finite values (stat_count).

DistSummary%>%
  arrange(MeanErrorSquard)
## # A tibble: 60 × 8
## # Groups:   window, quant [20]
##    window quant FlagType           Mean MeanErrorSquard   Var     n Missed
##     <dbl> <dbl> <chr>             <dbl>           <dbl> <dbl> <int>  <dbl>
##  1     90   0.9 flag.ntile.Pval  0.0120            3     3.01   334 0.0240
##  2     14   0.9 flag.ntile.Pval -0.172             3.14  3.22    29 0.0345
##  3     90   0.9 flag.ntile       0.0312            3.24  3.24   416 0.0264
##  4     14   0.9 flag.ntile       0.0678            3.25  3.31    59 0.0339
##  5     60   0.9 flag.ntile.Pval -0.0580            3.58  3.59   293 0.0239
##  6     90   0.8 flag.ntile.Pval -0.0382            4.16  4.16   550 0.0327
##  7     60   0.9 flag.ntile      -0.118             4.34  4.33   357 0.0280
##  8     90   0.8 flag.ntile      -0.0252            4.38  4.38   714 0.0350
##  9     60   0.8 flag.ntile.Pval -0.160             4.96  4.94   594 0.0303
## 10     90   0.7 flag.ntile.Pval -0.0328            5.02  5.02   671 0.0387
## # … with 50 more rows
A <- DistSummary%>%
  ggplot(aes(x = n, y = MeanErrorSquard, color = paste(window, quant)))+
  geom_point()+
  geom_vline(xintercept = CaseNumberFlags)

ggplotly(A)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
B <- DistSummary%>%
  ggplot(aes(x = n, y = Var, color = paste(window, quant)))+
  geom_point()+
  geom_vline(xintercept = CaseNumberFlags)

ggplotly(B)
#peters method to select outliers
SitePop <- baseWaste_DF[,c("site","population_served")]%>%
  group_by(site)%>%
  summarise(mean_pop_served = mean(population_served, na.rm = TRUE))

flag_Model_DF = DateDistDF %>%
  left_join(SitePop)%>%
  mutate(abs_difference = abs(FlagError),
         inverse_difference = 1/(FlagError+0.01)*100) %>%
  # Filter out differences >30
  filter(abs_difference <=30)
## Joining, by = "site"
testmodel = lm(inverse_difference ~ quant + window + date + mean_pop_served, 
               data = flag_Model_DF[which(!is.na(flag_Model_DF$inverse_difference)),])

summary(testmodel)
## 
## Call:
## lm(formula = inverse_difference ~ quant + window + date + mean_pop_served, 
##     data = flag_Model_DF[which(!is.na(flag_Model_DF$inverse_difference)), 
##         ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6777  -6063   3546   3877   5107 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.631e+04  2.386e+03  11.029  < 2e-16 ***
## quant            5.255e+02  1.398e+02   3.759 0.000171 ***
## window           1.410e+00  6.739e-01   2.093 0.036392 *  
## date            -1.085e+00  1.268e-01  -8.558  < 2e-16 ***
## mean_pop_served -1.758e-03  1.245e-04 -14.121  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4859 on 64488 degrees of freedom
## Multiple R-squared:  0.005071,   Adjusted R-squared:  0.005009 
## F-statistic: 82.17 on 4 and 64488 DF,  p-value: < 2.2e-16